library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggpubr)
## Loading required package: ggplot2
library(Biostrings)
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:data.table':
##
## first, second
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:data.table':
##
## shift
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.4 ✔ stringr 1.4.0
## ✔ readr 2.0.1 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ dplyr::between() masks data.table::between()
## ✖ Biostrings::collapse() masks IRanges::collapse(), dplyr::collapse()
## ✖ BiocGenerics::combine() masks dplyr::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ IRanges::desc() masks dplyr::desc()
## ✖ S4Vectors::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ S4Vectors::first() masks dplyr::first(), data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ S4Vectors::rename() masks dplyr::rename()
## ✖ XVector::slice() masks IRanges::slice(), dplyr::slice()
## ✖ purrr::transpose() masks data.table::transpose()
library(foreach)
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
library(stringdist)
##
## Attaching package: 'stringdist'
## The following object is masked from 'package:tidyr':
##
## extract
library(doParallel)
## Loading required package: iterators
# Function to provide a closest match. Used to match HLA Alleles across mixed output styles.
ClosestMatch2 = function(string, stringVector){
stringVector[amatch(string, stringVector, maxDist=Inf)]
}
Question: If we look at ratios of binding (agretopicity), stability, and/or fraction of hydrophobicity, do we see any evidence of immune escape?
WUHAN_PEPTIDES_BINDING_SCORES=readRDS("ORIGINAL_PEPTIDES_BINDING_SCORES_V2.rds")
WUHAN_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 15
## Peptide Predicted_Bindi… `Thalf(h)` `EL-score` EL_Rank `BA-score` BA_Rank
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AADLDDFSKQL HLA-A0101,HLA-A… 0.2226,0.… 0.0085,0.… 5.3632… 0.0571,0.… 19.165…
## 2 AAGLEAPFLY HLA-A0101,HLA-A… 0.9656,0.… 0.2728,4e… 0.5874… 0.2922,0.… 0.7888…
## 3 AAGLEAPFLYLY HLA-A0101,HLA-A… 0.9528,0.… 0.0694,1e… 1.6217… 0.2495,0.… 1.0825…
## 4 AAISDYDYY HLA-A0101,HLA-A… 1.0031,0.… 0.3568,4e… 0.4523… 0.3202,0.… 0.6298…
## 5 AAISDYDYYR HLA-A0101,HLA-A… 0.3461,0.… 0.0039,4e… 8.4074… 0.1012,0.… 6.4802…
## 6 AAVYRINW HLA-A0101,HLA-A… 0.3428,0.… 4e-04,0,0… 30.964… 0.0245,0.… 62.656…
## # … with 8 more variables: Ave <chr>, Binder <chr>, nM_41 <chr>, nM <chr>,
## # Aff_Binder <chr>, Length <int>, HydrophobicCount <int>, HydroFraction <dbl>
WUHAN_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 1380
WUHAN_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(Peptide)) %>% filter(Length%in% c(9,10))%>% select(Peptide, Predicted_Binding, Binder,Length) %>% separate_rows_(cols=c("Predicted_Binding", "Binder"), sep=",") %>% distinct() %>% group_by(Predicted_Binding,Binder,Length) %>%mutate(Length = as.character(Length))%>%
dplyr::summarise(n=n())%>% filter(Binder == "BINDER")%>% arrange(desc(n))%>%
ggbarplot(x="Predicted_Binding",y="n",fill="Length",position = position_dodge())+coord_flip()
## `summarise()` has grouped output by 'Predicted_Binding', 'Binder'. You can override using the `.groups` argument.
WUHAN_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(Peptide)) %>% filter(Length%in% c(9,10))%>% select(Peptide, Predicted_Binding, Binder,Length) %>% separate_rows_(cols=c("Predicted_Binding", "Binder"), sep=",") %>% distinct() %>% filter(Predicted_Binding == "HLA-A0201")%>% group_by(Predicted_Binding,Binder,Length) %>%mutate(Length = as.character(Length))%>%
dplyr::summarise(n=n())%>% filter(Binder == "BINDER")%>% arrange(desc(n))
## `summarise()` has grouped output by 'Predicted_Binding', 'Binder'. You can override using the `.groups` argument.
## # A tibble: 2 × 4
## # Groups: Predicted_Binding, Binder [1]
## Predicted_Binding Binder Length n
## <chr> <chr> <chr> <int>
## 1 HLA-A0201 BINDER 9 162
## 2 HLA-A0201 BINDER 10 48
OMICRON_PEPTIDES_BINDING_SCORES=readRDS("OMICRON_PEPTIDES_BINDING_SCORES_V2.rds")
OMICRON_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 13
## VariantAlignment MT_Predicted_Bind… `MT_Thalf(h)` `MT_EL-score` MT_EL_Rank
## <chr> <chr> <chr> <chr> <chr>
## 1 AEYVNNSY HLA-A0101,HLA-A02… 0.2295,0.1438,… 0.0412,0,1e-0… 2.2565,55.…
## 2 AKSHNITLIW HLA-A0101,HLA-A02… 0.2395,0.1924,… 0.0048,1e-04,… 7.3782,47.…
## 3 ALRITFGGP HLA-A0101,HLA-A02… 0.0843,0.1071,… 0,9e-04,0.004… 73.4615,18…
## 4 APGQTGNIA HLA-A0101,HLA-A02… 0.0906,0.1022,… 5e-04,0,1e-04… 29.3103,52…
## 5 CNDPFLDHK HLA-A0101,HLA-A02… 0.1508,0.093,0… 0.0105,0,1e-0… 4.7481,63.…
## 6 CNDPFLDHKN HLA-A0101,HLA-A02… 0.0855,0.0757,… 2e-04,0,0,0,0… 46,90,95,9…
## # … with 8 more variables: MT_BA-score <chr>, MT_BA_Rank <chr>, MT_Ave <chr>,
## # MT_Binder <chr>, MT_nM_41 <chr>, Predicted_Binding <chr>, MT_nM <chr>,
## # MT_Aff_Binder <chr>
OMICRON_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 81
OMICRON_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(VariantAlignment)) %>% filter(Length%in% c(9,10))%>% select(VariantAlignment, MT_Predicted_Binding, MT_Binder,Length) %>% separate_rows_(cols=c("MT_Predicted_Binding", "MT_Binder"), sep=",") %>% distinct() %>% group_by(MT_Predicted_Binding,MT_Binder,Length) %>%mutate(Length = as.character(Length))%>%
dplyr::summarise(n=n())%>% filter(MT_Binder == "BINDER")%>% arrange(desc(n))%>%
ggbarplot(x="MT_Predicted_Binding",y="n",fill="Length",position = position_dodge())+coord_flip()
## `summarise()` has grouped output by 'MT_Predicted_Binding', 'MT_Binder'. You can override using the `.groups` argument.
OMICRON_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(VariantAlignment)) %>% filter(Length%in% c(9,10))%>% select(VariantAlignment, MT_Predicted_Binding, MT_Binder,Length) %>% separate_rows_(cols=c("MT_Predicted_Binding", "MT_Binder"), sep=",") %>% distinct() %>% filter(MT_Predicted_Binding == "HLA-A0201")%>% group_by(MT_Predicted_Binding,MT_Binder,Length) %>%mutate(Length = as.character(Length))%>%
dplyr::summarise(n=n())%>% filter(MT_Binder == "BINDER")%>% arrange(desc(n))
## `summarise()` has grouped output by 'MT_Predicted_Binding', 'MT_Binder'. You can override using the `.groups` argument.
## # A tibble: 2 × 4
## # Groups: MT_Predicted_Binding, MT_Binder [1]
## MT_Predicted_Binding MT_Binder Length n
## <chr> <chr> <chr> <int>
## 1 HLA-A0201 BINDER 9 7
## 2 HLA-A0201 BINDER 10 2
MUTATIONS = readRDS("OMICRON_EPITOPE_MUTATIONS.rds")
MUTATIONS %>% mutate(Length = nchar(Peptide))%>% filter(Protein == 'surface glycoprotein') %>% filter(Length== 9)%>% nrow
## [1] 28
#MUTATIONS=MUTATIONS %>% group_by(Peptide, VariantAlignment,start_pos,Mutation) %>% dplyr::summarise(Protein = paste0(Protein, collapse = ","))
MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES)%>% nrow
## Joining, by = "Peptide"
## [1] 80
MUTATIONS %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES) %>% nrow
## Joining, by = "VariantAlignment"
## [1] 80
FULL_MUTATIONS_DT=MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES) %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES)
## Joining, by = "Peptide"
## Joining, by = c("VariantAlignment", "Predicted_Binding")
#FULL_MUTATIONS_DT %>% head %>% DT::datatable()
#saveRDS(FULL_MUTATIONS_DT,file="FULL_MUTATIONS_BINDING.rds")
# mUTATED+HOMOLOGOUS oeptide
FULL_MUTATIONS_DT %>% filter(Peptide %in% "WLLWPVTLA")
## # A tibble: 1 × 32
## Peptide VariantAlignment start_pos Protein MutationPos Mutation Hamming
## <chr> <chr> <int> <chr> <chr> <chr> <dbl>
## 1 WLLWPVTLA WLLWPVTLT 55 membrane gl… 63 A63T 1
## # … with 25 more variables: Predicted_Binding <chr>, Thalf(h) <chr>,
## # EL-score <chr>, EL_Rank <chr>, BA-score <chr>, BA_Rank <chr>, Ave <chr>,
## # Binder <chr>, nM_41 <chr>, nM <chr>, Aff_Binder <chr>, Length <int>,
## # HydrophobicCount <int>, HydroFraction <dbl>, MT_Predicted_Binding <chr>,
## # MT_Thalf(h) <chr>, MT_EL-score <chr>, MT_EL_Rank <chr>, MT_BA-score <chr>,
## # MT_BA_Rank <chr>, MT_Ave <chr>, MT_Binder <chr>, MT_nM_41 <chr>,
## # MT_nM <chr>, MT_Aff_Binder <chr>
FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment) %>% distinct()
## # A tibble: 80 × 2
## Peptide VariantAlignment
## <chr> <chr>
## 1 AEHVNNSY AEYVNNSY
## 2 AKSHNIALIW AKSHNITLIW
## 3 APGQTGKIA APGQTGNIA
## 4 APRITFGGP ALRITFGGP
## 5 CNDPFLGVY CNDPFLDHK
## 6 CNDPFLGVYY CNDPFLDHKN
## 7 DGVYFASTEK DGVYFASIEK
## 8 DSKVGGNYNY DSKVSGNYNY
## 9 EELKKLLEQW EELKKLLEEW
## 10 FCNDPFLGVY FCNDPFLDHK
## # … with 70 more rows
FULL_MUTATIONS_DT %>% nrow
## [1] 80
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, EL_Rank, MT_EL_Rank, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","EL_Rank","MT_EL_Rank","Binder","MT_Binder"),sep=",")
DATA_FOR_ANALYSIS %>% nrow
## [1] 5120
AB_SUPERTYPES=fread("HLA_AB_SUPERTYPES.csv") %>% mutate(Allele = gsub("\\*","",Allele))%>% mutate(Allele = paste0("HLA-",Allele))%>% dplyr::rename(HLA_Allele = Allele)
DATA_FOR_ANALYSIS%>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% nrow
## [1] 3600
HLA_A_B_DATA_FOR_ANALYSIS =DATA_FOR_ANALYSIS%>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(EL_Rank = as.numeric(EL_Rank), MT_EL_Rank=as.numeric(MT_EL_Rank))
HLA_A_B_DATA_FOR_ANALYSIS=HLA_A_B_DATA_FOR_ANALYSIS%>% mutate(LossGain = case_when(
(Binder == "BINDER" & MT_Binder == "NONBINDER") ~ "Loss",
(Binder == "NONBINDER" & MT_Binder == "NONBINDER") ~ "Same_NonBinder",
(Binder == "NONBINDER" & MT_Binder == "BINDER") ~ "Gain",
(Binder == "BINDER" & MT_Binder == "BINDER") ~ "Same_Binder"
))
HLA_A_B_DATA_FOR_ANALYSIS %>% filter(LossGain %in% c("Loss","Gain"))%>%group_by(HLA_Allele, LossGain) %>% dplyr::summarise(n=n())%>% inner_join(AB_SUPERTYPES)%>%
ggboxplot(x="LossGain",y="n",facet.by = "Supertype")+stat_compare_means(label="p.signif")
## `summarise()` has grouped output by 'HLA_Allele'. You can override using the `.groups` argument.
## Joining, by = "HLA_Allele"
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",")%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)
#DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)%>%
# ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+rotate_x_text()
# without hlac
HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES) %>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
mutate(Supertype = gsub("HLA\\-","",Supertype))%>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%
ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%arrange((HLA_Allele))%>%
ggboxplot(x="HLA_Allele",y="Agretopicity",add="jitter")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>%
ggpaired(cond1 = "nM_41", cond2 = "MT_nM_41",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% filter(Protein == "surface glycoprotein") %>% select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",")%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)
DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)%>%
ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+rotate_x_text()
## Joining, by = "HLA_Allele"
# without hlac
HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES) %>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
mutate(Supertype = gsub("HLA\\-","",Supertype))%>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%
ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%arrange((HLA_Allele))%>%
ggboxplot(x="HLA_Allele",y="Agretopicity",add="jitter")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>% mutate(Supertype = forcats::fct_inorder(Supertype))%>%
ggbarplot(x="Supertype",y="n",fill="name",position = position_dodge())+rotate_x_text(angle=90)+ylab("# peptides w/ mutations in Omicron predicted to bind")
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>%
ggpaired(cond1 = "nM_41", cond2 = "MT_nM_41",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")
BA1_SUPERTYPE_BOXPLT=HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Supertype %in% c("A02","A03","B07","C01"))%>%
dplyr::rename(Wuhan=nM_41, Omicron = MT_nM_41)%>%
ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,nrow=1)+theme_pubr(base_size = 16)+scale_y_log10()+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Log10 Binding Affinity (nM)")
BA1_SUPERTYPE_BOXPLT
saveRDS(BA1_SUPERTYPE_BOXPLT,file="BA1_SUPERTYPE_BOXPLT.rds")
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Supertype == 'B07') %>% arrange(desc(Agretopicity))
## # A tibble: 26 × 9
## Peptide VariantAlignment HLA_Allele nM_41 MT_nM_41 Binder MT_Binder
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 SPRRARSVA SHRRARSVA HLA-B0702 4.65 4722. BINDER NONBINDER
## 2 SPRRARSVA SHRRARSVA HLA-B4201 8.90 6169. BINDER NONBINDER
## 3 SPRRARSV SHRRARSV HLA-B0702 24.4 12722. BINDER NONBINDER
## 4 SPRRARSV SHRRARSV HLA-B4201 38.6 11617. BINDER NONBINDER
## 5 FCNDPFLGVY FCNDPFLDHK HLA-B3501 991. 38357. BINDER NONBINDER
## 6 VTWFHAIHV VTWFHVISG HLA-B5101 7776. 30330. BINDER NONBINDER
## 7 VGYQPYRVV VGHQPYRVV HLA-B5101 3840. 9665. BINDER NONBINDER
## 8 NGVEGFNCY NGVAGFNCY HLA-B3501 347. 639. BINDER BINDER
## 9 YGFQPTNGV YSFRPTYGV HLA-B5101 3488. 5066. BINDER BINDER
## 10 SVLYNSASF SVLYNLAPF HLA-B3501 153. 191. BINDER BINDER
## # … with 16 more rows, and 2 more variables: Agretopicity <dbl>,
## # Supertype <chr>
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>%filter(Supertype == 'B07')%>% arrange((n))%>%
mutate(pct_change = (n/lead(n) -1) * 100)
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.
## # A tibble: 2 × 5
## Supertype name value n pct_change
## <chr> <chr> <chr> <int> <dbl>
## 1 B07 MT_Binder BINDER 19 -17.4
## 2 B07 WT_Binder BINDER 23 NA
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>%filter(Supertype == 'A01')%>% arrange((n))%>%
mutate(pct_change = (n/lead(n) -1) * 100)
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.
## # A tibble: 2 × 5
## Supertype name value n pct_change
## <chr> <chr> <chr> <int> <dbl>
## 1 A01 MT_Binder BINDER 39 -11.4
## 2 A01 WT_Binder BINDER 44 NA
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>% mutate(Supertype = forcats::fct_inorder(Supertype))%>%
ggbarplot(x="Supertype",y="n",fill="name",position = position_dodge())+rotate_x_text(angle=90)+ylab("# peptides w/ mutations in Omicron predicted to bind")
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.